/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package flink.parquet.tpch;

import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;

import parquet.filter2.predicate.FilterPredicate;
import parquet.hadoop.ParquetInputFormat;
import parquet.hadoop.thrift.ParquetThriftInputFormat;
import parquet.hadoop.thrift.ThriftReadSupport;

import java.io.IOException;

import flink.parquet.thrift.*;
import flink.parquet.tpch.filter.*;
import parquet.io.api.Binary;

import static parquet.filter2.predicate.FilterApi.binaryColumn;
import static parquet.filter2.predicate.FilterApi.eq;
import static parquet.filter2.predicate.Operators.BinaryColumn;

/**
 * This program implements a modified version of the TPC-H query 3. The
 * example demonstrates how to assign names to fields by extending the Tuple class.
 * The original query can be found at
 * <a href="http://www.tpc.org/tpch/spec/tpch2.16.0.pdf">http://www.tpc.org/tpch/spec/tpch2.16.0.pdf</a> (page 29).
 * <p/>
 * <p/>
 * This program implements the following SQL equivalent:
 * <p/>
 * <p/>
 * <code><pre>
 * SELECT
 *      l_orderkey,
 *      SUM(l_extendedprice*(1-l_discount)) AS revenue,
 *      o_orderdate,
 *      o_shippriority
 * FROM customer,
 *      orders,
 *      lineitem
 * WHERE
 *      c_mktsegment = '[SEGMENT]'
 *      AND c_custkey = o_custkey
 *      AND l_orderkey = o_orderkey
 *      AND o_orderdate < date '[DATE]'
 *      AND l_shipdate > date '[DATE]'
 * GROUP BY
 *      l_orderkey,
 *      o_orderdate,
 *      o_shippriority;
 * </pre></code>
 * <p/>
 * <p/>
 * Compared to the original TPC-H query this version does not sort the result by revenue
 * and orderdate.
 * <p/>
 * <p/>
 * Input files are plain text CSV files using the pipe character ('|') as field separator
 * as generated by the TPC-H data generator which is available at <a href="http://www.tpc.org/tpch/">http://www.tpc
 * .org/tpch/</a>.
 * <p/>
 * <p/>
 * Usage: <code>TPCHQuery3Parquet &lt;lineitem-parquet path&gt; &lt;customer-parquet path&gt; &lt;orders-parquet path&gt; &lt;result 
 * path&gt;</code><br>
 * <p/>
 * <p/>
 * This example shows how to use:
 * <ul>
 * <li> custom data type derived from tuple data types
 * <li> inline-defined functions
 * <li> build-in aggregation functions
 * </ul>
 */
@SuppressWarnings("serial")
public class TPCHQuery3Parquet {

	// *************************************************************************
	//     PROGRAM
	// *************************************************************************

	public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


		// get input data
		DataSet<Lineitem> lineitems = getLineitemDataSet(env).map(new MapLineitems());
		DataSet<Order> orders = getOrdersDataSet(env).map(new MapOrders());
		DataSet<Customer> customers = getCustomerDataSet(env).map(new MapCustomers());
		
		// Join customers with orders and package them into a ShippingPriorityItem
		DataSet<ShippingPriorityItem> customerWithOrders =
			customers.join(orders).where(0).equalTo(1)
				.with(
					new JoinFunction<Customer, Order, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(Customer c, Order o) {
							return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(),
								o.getShippriority());
						}
					});

		// Join the last join result with Lineitems
		DataSet<ShippingPriorityItem> result =
			customerWithOrders.join(lineitems).where(0).equalTo(0)
				.with(
					new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
							i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
							return i;
						}
					})
					// Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum
				.groupBy(0, 2, 3)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		result.writeAsCsv(outputPath, "\n", "|");

		// execute program
		env.execute("TPCH Query 3 - Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}


	public static class ShippingPriorityItem extends Tuple4<Long, Double, String, Integer> {

		public ShippingPriorityItem() {
		}

		public ShippingPriorityItem(Long o_orderkey, Double revenue,
									String o_orderdate, Integer o_shippriority) {
			this.f0 = o_orderkey;
			this.f1 = revenue;
			this.f2 = o_orderdate;
			this.f3 = o_shippriority;
		}

		public Long getOrderkey() {
			return this.f0;
		}

		public void setOrderkey(Long orderkey) {
			this.f0 = orderkey;
		}

		public Double getRevenue() {
			return this.f1;
		}

		public void setRevenue(Double revenue) {
			this.f1 = revenue;
		}

		public String getOrderdate() {
			return this.f2;
		}

		public Integer getShippriority() {
			return this.f3;
		}
	}

	// *************************************************************************
	//     UTIL METHODS
	// *************************************************************************

	private static String lineitemPath;
	private static String customerPath;
	private static String ordersPath;
	private static String outputPath;

	private static boolean parseParameters(String[] programArguments) {

		if (programArguments.length > 0) {
			if (programArguments.length == 4) {
				lineitemPath = programArguments[0];
				customerPath = programArguments[1];
				ordersPath = programArguments[2];
				outputPath = programArguments[3];
			} else {
				System.err.println("Usage: TPCHQuery3Parquet <lineitem-parquet path> <customer-parquet path> <orders-parquet path> " +
					"<result path>");
				return false;
			}
		} else {
			System.err.println("This program expects data from the TPC-H benchmark as input data.\n" +
				"  Due to legal restrictions, we can not ship generated data.\n" +
				"  You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" +
				"  Usage: TPCHQuery3 <lineitem-parquet path> <customer-parquet path> <orders-parquet path> <result path>");
			return false;
		}
		return true;
	}

	private static final class MapLineitems implements MapFunction<Tuple2<Void, LineitemTable>, Lineitem> {

		@Override
		public Lineitem map(Tuple2<Void, LineitemTable> value) {
			Lineitem tuple = new Lineitem();
			tuple.f0 = value.f1.getORDERKEY();
			tuple.f1 = value.f1.getEXTENDEDPRICE();
			tuple.f2 = value.f1.getDISCOUNT();
			tuple.f3 = value.f1.getSHIPDATE();
			return tuple;

		}
	}

	private static final class MapOrders implements MapFunction<Tuple2<Void, OrderTable>, Order> {

		@Override
		public Order map(Tuple2<Void, OrderTable> value) {
			Order tuple = new Order();
			tuple.f0 = value.f1.getID();
			tuple.f1 = value.f1.getCUSTKEY();
			tuple.f2 = value.f1.getORDERDATE();
			tuple.f3 = value.f1.getSHIP_PRIORITY();
			return tuple;

		}
	}

	private static final class MapCustomers implements MapFunction<Tuple2<Void, CustomerTable>, Customer> {

		@Override
		public Customer map(Tuple2<Void, CustomerTable> value) {
			Customer tuple = new Customer();
			tuple.f0 = value.f1.getID();
			tuple.f1 = value.f1.getMKTSEGMENT();
			return tuple;

		}
	}


	private static DataSet<Tuple2<Void, LineitemTable>> getLineitemDataSet(ExecutionEnvironment env) throws 
		IOException {
		Job job = Job.getInstance();

		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "ORDERKEY;EXTENDEDPRICE;DISCOUNT;SHIPDATE");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			LineitemTable.class, job);

		// Filter all Lineitems with l_shipdate > 12.03.1995
		ParquetThriftInputFormat.setUnboundRecordFilter(job, LineitemFilter.class);

		ParquetThriftInputFormat.addInputPath(job, new Path(lineitemPath));

		DataSet<Tuple2<Void, LineitemTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}


	private static DataSet<Tuple2<Void, CustomerTable>> getCustomerDataSet(ExecutionEnvironment env) throws 
		IOException {
		Job job = Job.getInstance();

		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "ID;MKTSEGMENT");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			CustomerTable.class, job);

		// Filter market segment "AUTOMOBILE"
		BinaryColumn mktsegment = binaryColumn("MKTSEGMENT");
		FilterPredicate mktsegmentPred = eq(mktsegment, Binary.fromString("AUTOMOBILE"));
		ParquetInputFormat.setFilterPredicate(job.getConfiguration(), mktsegmentPred);

		ParquetThriftInputFormat.addInputPath(job, new Path(customerPath));

		DataSet<Tuple2<Void, CustomerTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}

	private static DataSet<Tuple2<Void, OrderTable>> getOrdersDataSet(ExecutionEnvironment env) throws IOException {
		Job job = Job.getInstance();

		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "ID;CUSTKEY;ORDERDATE;SHIP_PRIORITY");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			OrderTable.class, job);

		ParquetThriftInputFormat.addInputPath(job, new Path(ordersPath));

		// Filter all Orders with o_orderdate < 12.03.1995
		ParquetThriftInputFormat.setUnboundRecordFilter(job, OrderFilter.class);


		DataSet<Tuple2<Void, OrderTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}


	public static class Lineitem extends Tuple4<Long, Double, Double, String> {

		public Long getOrderkey() {
			return this.f0;
		}

		public Double getExtendedprice() {
			return this.f1;
		}

		public Double getDiscount() {
			return this.f2;
		}

		public String getShipdate() {
			return this.f3;
		}
	}

	public static class Customer extends Tuple2<Long, String> {

		public Long getCustKey() {
			return this.f0;
		}

		public String getMktsegment() {
			return this.f1;
		}
	}

	public static class Order extends Tuple4<Long, Long, String, Integer> {

		public Long getOrderKey() {
			return this.f0;
		}

		public Long getCustKey() {
			return this.f1;
		}

		public String getOrderdate() {
			return this.f2;
		}

		public int getShippriority() {
			return this.f3;
		}
	}

}
